clear all
tempfile tempsave
set seed ${seed}


// Use the CPS (1975-2000) extract.
use if year>1975 using "${rawdata}cps/cps.dta", clear

/* Age */
tab age, m

/* Annual earnings */
sum incwage, det
replace incwage=. if incwage==99999999
sum incwage, det


	*Multiply top-coded incomes by 1.5, as in Shenhav 2020 (p.24).
	*https://cps.ipums.org/cps/topcodes_tables.shtml
forvalues i=1976/1995{
	sum incwage if year==`i',
	replace incwage = incwage * 1.5 if year==`i' & incwage == `r(max)'
	sum incwage  if year==`i'
}

	*Adjusting for inflation.
	*https://cps.ipums.org/cps/cpi99.shtml
replace incwage=incwage * cpi99

/* Annual hours worked */
tab wkswork1, m
tab uhrsworkly, m

gen hours=wkswork1*uhrsworkly if uhrsworkly!=999

/* Winsorize hours distribution at 99th percentile */
gstats winsor hours, cuts(0 99) by(year) replace

/*Hourly Wage*/
gen wage=incwage / hours
bysort year: sum wage

	*Top-cut hourly wage, as in Shenhav 2020 (p.24).
replace wage = ((629974 * 1.5) / 1400) if wage > ((629974 * 1.5) / 1400) & !missing(wage)
bysort year: sum wage

/*Industry - using old 1990 crosswalk.*/
*Compare https://usa.ipums.org/usa/volii/ind1990.shtml
tab ind1990, m
rename ind1990 ind_old

replace ind = .
replace ind = .  if inlist(ind_old,0,939)
// (1)  Agriculture, forestry, and fishing
replace ind = 1  if inlist(ind_old,010,011,012,020,030,031,032)
// (2)  Mining
replace ind = 2  if inlist(ind_old,040,041,042,050)
// (3)  Construction
replace ind = 3  if inlist(ind_old,060)
// (4)  Low Tech Manufacturing
replace ind = 4  if inlist(ind_old,100,101,102,110,111,112,120,121,122,130,132,140,141,142,150,151,152,220,221,222,230,231,232,241,242,250,251,252,261,262)
// (5)  Basic Tech Manufacturing
replace ind = 5  if inlist(ind_old,130,160,161,162,171,172,270,271,272,280,281,282,290,291,292,300,301,310,311,312,320,321,322,331,332,340,341,342,350,351,360,361,370,390,391,392)
// (6)  High Tech Manufacturing
replace ind = 6  if inlist(ind_old,180,181,182,190,191,192,200,201,210,211,212,352,362,371,372,380,381)
// (7)  Transportation
replace ind = 7  if inlist(ind_old,400,401,402,410,411,412,420,421,422,432)
// (8)  Communications
replace ind = 8  if inlist(ind_old,440,441,442)
// (9)  Utilities
replace ind = 9  if inlist(ind_old,450,451,452,470,471,472)
// (10) Wholesale Trade
replace ind = 10 if inlist(ind_old,500,501,502,510,511,512,521,530,531,532,540,541,542,550,551,552,560,561,562,571)
// (11) Retail Trade
replace ind = 11 if inlist(ind_old,580,581,582,590,591,592,600,601,602,610,611,612,620,621,622,623,630,631,632,633,640,641,642,650,651,652,660,661,662,663,670,671,672,681,682,691)
// (12) Finance
replace ind = 12 if inlist(ind_old,700,701,702,710,711,712)
// (13) Business and Repair Services
replace ind = 13 if inlist(ind_old,721,722,731,732,740,741,742,750,751,752,760)
// (14) Personal Services
replace ind = 14 if inlist(ind_old,761,762,770,771,772,780,781,782,790,791)
// (15) Entertainment and Recreation Services
replace ind = 15 if inlist(ind_old,800,801,802,810)
// (16) Professional Services
replace ind = 16 if inlist(ind_old,812,820,821,822,830,831,832,840,841,842,850,851,852,860,861,862,863,870,871,872,873,880,881,882,890,891,892,893)
// (17) Public Administration
replace ind = 17 if inlist(ind_old,900,901,910,921,922,930,931,932)
tab ind, m

// Occupation in 1990 Codes.
tab occ1990, m
rename occ1990 occ_old

replace occ = .
// No occupation or military.
replace occ = .  if occ_old == 0  | occ_old > 900
// (1)  Executive, Administrative, and Managerial Occupations
replace occ = 1  if occ_old > 0   & occ_old < 023
// (2)  Management Related Occupations
replace occ = 2  if occ_old > 022 & occ_old < 038
// (3)  Professional Specialty Occupations
replace occ = 3  if occ_old > 037 & occ_old < 200
// (4)  Technicians and Related Support Occupations
replace occ = 4  if occ_old > 199 & occ_old < 209
// (5)  Technologists and Technicians, Except Health
replace occ = 5  if occ_old > 208 & occ_old < 236
// (6)  Sales Occupations
replace occ = 6  if occ_old > 235 & occ_old < 291
// (7)  Administrative Support Occupations, Including Clerical
replace occ = 7  if occ_old > 290 & occ_old < 392
// (8)  Private Household Occupations
replace occ = 8  if occ_old > 391 & occ_old < 409
// (9)  Protective Service Occupations
replace occ = 9  if occ_old > 408 & occ_old < 428
// (10) Service Occupations, Except Protective and Household
replace occ = 10 if occ_old > 427 & occ_old < 470
// (11) Farm Operators and Managers
replace occ = 11 if occ_old > 469 & occ_old < 477
// (12) Other Agricultural and Related Occupations
replace occ = 12 if occ_old > 476 & occ_old < 500
// (13) Mechanics and Repairers, Except Supervisors
replace occ = 13 if occ_old > 503 & occ_old < 550
// (14) Construction Trades
replace occ = 14 if occ_old > 549 & occ_old < 600
// (15) Extractive Occupations
replace occ = 15 if occ_old > 599 & occ_old < 618
// (16) Precision Production Occupations
replace occ = 16 if occ_old > 617 & occ_old < 700
// (17) Machine Operators, Assemblers, and Inspectors Machine Operators and Tenders, Except Precision
replace occ = 17 if occ_old > 699 & occ_old < 780
// (18) Fabricators, Assemblers, and Hand Working Occupations
replace occ = 18 if occ_old > 779 & occ_old < 796
// (19) Production Inspectors, Testers, Samplers, and Weighers
replace occ = 19 if occ_old > 795 & occ_old < 800
// (20) Transportation and Material Moving Occupations
replace occ = 20 if occ_old > 799 & occ_old < 816
// (21) Transportation Occupations, Except Motor Vehicles
replace occ = 21 if occ_old > 815 & occ_old < 891
tab occ, m

/*Census Regions*/
*Source: https://www2.census.gov/geo/pdfs/maps-data/maps/reference/us_regdiv.pdf
tab statefip, m

gen cregion = .
 // North-East
replace cregion = 1 if inlist(statefip,9,23,25,33,44,50,34,36,42)
// Midwest
replace cregion = 2 if inlist(statefip,18,17,26,39,55,19,20,27,29,31,38,46)
// South
replace cregion = 3 if inlist(statefip,10,11,12,13,24,37,45,51,54,1,21,28,47,5,22,40,48)
// West
replace cregion = 4 if inlist(statefip,4,8,16,35,30,49,32,56,2,6,15,41,53)
tab cregion, m

/*Total*/
gen total=1

// Restrict
local vars year ind occ wage cregion total
gegen obsdrop=rowmiss(`vars')
tab obsdrop, m
keep if obsdrop==0
keep `vars'
save "${temp}cps.dta", replace

/*Create wage development at national level in industry-occupation cells jacknife excluding single census regions*/
local groups 	io i o tot
local v_io		ind occ
local v_i 		ind
local v_o		occ
local v_tot 	total

local vars year cregion ind occ total
foreach x of local vars{
	gdistinct `x'
	global size_`x'=r(ndistinct)
}

foreach g of local groups{
	/*Create wage and employment trends per group at national level (jacknife exclusion of regional units)*/
	use year cregion `v_`g'' wage using "${temp}cps.dta", clear	
	forvalues r=1/${size_cregion}{
		
		preserve
		di "Region:" "`r'"
		drop if cregion==`r'			
		gcollapse (mean) wage (count) empl=year, by(year `v_`g'')
		gen cregion=`r'
				
		rename (wage empl) (wage_`g'  empl_`g')
		
		keep cregion year wage_* empl_* `v_`g''
		
		if `r'>1{
			append using "`tempsave'"
		}
		save "`tempsave'", replace
		restore
	}
	
	/*Create synthetic dataset to check existence of all cells*/
	clear 
	set obs ${size_year}
	gen year=_n+1976-1
	expand ${size_cregion}
	bysort year: gen cregion=_n
	local gg
	foreach m of local v_`g'{
		expand ${size_`m'}
		bysort year cregion `gg': gen `m'=_n
		local gg `gg' `m'
	}
	
	merge 1:1 year cregion `v_`g'' using "`tempsave'", nogen keep(match master)
	
	/*Replace missing cells with 0*/
	foreach x of varlist wage_* empl_*{
		replace `x'=0 if missing(`x')
	}

	save "${temp}dev_`g'.dta", replace	

}





